#include "IOPipe.h"
#include <iterator>

#include "CharUtils.h"

using namespace std;
using namespace egstra;


namespace dparser {

	void IOPipe::preprocessInstance( Instance *inst)
	{
		const int length = inst->size();

		if (_english) {
			inst->contain_hyphen.resize(length);
			inst->contain_number.resize(length);
			inst->contain_uppercase_char.resize(length);
		} else {
			inst->chars.resize(length);
		}

		if (_constrained_tag) {
			inst->constrained_tags_str.resize(0);
			inst->constrained_tags_str.resize(length);
		}
		for (int i = 1; i < length; ++i) {
			if (_copy_cpostag_from_postag) {
				inst->cpostags[i] = inst->postags[i];
			}

			if (_english) {
				const string &form = inst->forms[i];
				inst->contain_hyphen[i] = contain_hyphen(form) ? "hyp=y" : "hyp=n";
				inst->contain_number[i] = contain_number(form) ? "num=y" : "num=n";
				inst->contain_uppercase_char[i] = contain_uppercase_character(form) ? "upc=y" : "upc=n";
			} else {
				getCharactersFromUTF8String(inst->forms[i], inst->chars[i]);
			}

			if (_constrained_tag) {
				vector<string> vec;
				simpleTokenize(inst->pdeprels[i], vec, "_");
				if (!vec.empty()) {
					inst->constrained_tags_str[i] = vec;
				}
			}


		}
	}

	void IOPipe::getInstancesFromInputFile( const int startId /*= 0*/, const int maxInstNum/*=-1*/, const int instMaxLen/*=-1*/ )
	{
		cerr << "Get all instances from " << m_inf_name; print_time();
		dealloc_instance();

		_start_id = startId;

		int inst_thrown_ctr = 0;
		while (1) {
			const size_t this_posi = _inf_current_posi;
			const int this_id = startId + getInstanceNum();

			Instance * const inst = m_reader->getNext(this_id, _inf_current_posi);
			if (!inst) break;
			if (inst->forms.size() != inst->cpostags.size()) {
				cerr << "[BF " << inst_thrown_ctr++ << ":" << inst->size() << "] "; // Wenliang's data
				delete inst;
				continue;
			}

			if (instMaxLen > 0 && inst->size() > instMaxLen) { // to be consistent with the old version.
				cerr << " [" << inst_thrown_ctr++ << ":" << inst->size() << "] ";
				delete inst;
			} else {
				if (_use_instances_posi) {
					delete inst;
					m_instances_posi.push_back(this_posi);
				} else {
					m_instances.push_back(inst);
					preprocessInstance(inst);
				}
			}

			if (maxInstNum > 0 && getInstanceNum() == maxInstNum) break;
		}
		
		fillVecInstIdxToRead();

		cerr << "\ninstance num: " << getInstanceNum() << endl;
		cerr << "Done!"; print_time();
	}
}


